# -*- coding: utf-8 -*-
from selenium import webdriver
import time
from bs4 import BeautifulSoup
import requests
import re
import shutil
import os
import json
import argparse
import traceback

def execute_times(driver, times):
    for i in range(times + 1):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)


def save_cookies(driver):
    dcoo = driver.get_cookies()
    open('cookies.json', 'wb').write(json.dumps(dcoo).encode('utf-8'))

def set_cookies(driver, dcoo):
    for item in dcoo:
        driver.add_cookie(item)


def login(driver):
    input(u'等待登錄完成继续:')
    save_cookies(driver)


def process_download(url, index):
    d = 'output'
    if not os.path.isdir(d):
        shutil.os.makedirs(d, exist_ok=True)
    fn = os.path.join(d, f"{index}.jpg")
    result = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'})
    with open(fn, 'wb') as f:
        f.write(result.content)
        f.flush()
        f.close()
    print(f'{url} >> {index}.jpg')



def process_page(driver, url, urlset):
    driver.get(url) 
    while 'signin' in driver.current_url:
        dcoo = json.load(open('cookies.json', 'rb')) if os.path.isfile('cookies.json') else []
        if dcoo:
            set_cookies(driver, dcoo)
        else:
            login(driver)
        driver.get(url) 

    index = int(urlset['index'] if 'index' in urlset else 0)
    pat = re.compile(r'data-original=\"(https://.*?jpg)\"')
    bb = 0
    phtml = ''
    page = 0
    while True:
        html = driver.page_source
        print(f"page size:{len(html)}")
        mats = pat.findall(html, pos=bb)
        for m in mats:
            if m in urlset:
                continue
            urlset[m] = index
            process_download(m, index + 1)
            index = index + 1
        if mats:
            tmp = f'data-original=\"{mats[-1]}\"'
            bb = html.find(tmp) + len(tmp)
        if bb < 0:
            bb = 0
        page += 1
        if html == phtml and input(f'seems got end of page, check will continue ?') == 'no':
            break
        driver.execute_script(f"window.scrollTo(0, document.body.scrollHeight * 0.8)")
        time.sleep(0.5)
        driver.execute_script(f"window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(0.5)
    urlset['index'] = index

def read_url_set():
    ret = {}
    fn = os.path.join('output', '__urls.json')
    if os.path.isdir('output') and os.path.isfile(fn):
        with open(fn, 'rt') as f:
            ret = json.load(f)
    return ret

def write_url_set(urls):
    fn = os.path.join('output', '__urls.json')
    if not os.path.isdir('output'):
        shutil.os.makedirs('output', exist_ok=True)
    with open(fn, 'wb') as f:
        f.write(json.dumps(urls).encode('utf-8'))

def main(chrome, url):
    if not chrome:
        if os.path.isfile('chromedriver'):
            chrome = 'chromedriver'
        else:
            chrome = input('输入webchrome:').strip()

    if not url:
        url = input('输入url:').strip()

    urls = read_url_set()
    driver=webdriver.Chrome(executable_path=chrome)           
    # login(driver)
    try:
        process_page(driver, url, urls)
    except:
        traceback.print_exc()
    driver.quit()


if __name__ == '__main__':
    description = u"嘿嘿嘿, 知乎图片全搞定"
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('-chrome', dest='chrome', type=str, help='webchrome path')
    parser.add_argument('-url', dest='url', type=str, help='question url like:https://www.zhihu.com/question/26037846')
    args = parser.parse_args()
    main(args.chrome, args.url)